5.2.1 构建训练集和测试集

##方法1:使用k折叠交叉验证:
##构筑测试集和训练集:20%/80%
rattlerocc=cbind.data.frame(rattler$lon,rattler$lat) 
fold <- kfold(rattlerocc, k=5) 
rattlertest <- rattlerocc[fold == 1, ] 
rattlertrain <- rattlerocc[fold != 1, ] 

## 注意不仅仅是发生点数据需要随机分组,背景点同样也需要;
https://jcoliver.github.io/learn-r/011-species-distribution-models.html#additional-resources
testing.group <- 1

# Create vector of group memberships
group.presence <- kfold(x = obs.data, k = 5) # kfold is in dismo package

# Separate observations into training and testing groups
presence.train <- obs.data[group.presence != testing.group, ]
presence.test <- obs.data[group.presence == testing.group, ]

# Repeat the process for pseudo-absence points
group.background <- kfold(x = background, k = 5)
background.train <- background[group.background != testing.group, ]
background.test <- background[group.background == testing.group, ]

# Build a model using training data

bc.eval <- evaluate(p = presence.test,   # The presence testing data
                    a = background.test, # The absence testing data
                    model = bc.model,    # The model we are evaluating
                    x = bioclim.data)    # Climatic variables for use by model

# Determine minimum threshold for "presence"
bc.threshold <- threshold(x = bc.eval, stat = "spec_sens")
##方法2:使用随机采样的方式来获取train和test;
# get the same random sample for training and testing
set.seed(1) 
# randomly select 50% for training
##注意这里sample(x,n) ,其中x表示给定范围比如1-100之间的数字,n表示取样次数。前置的set.seed正好构建随机点;
selected <- sample(  1:nrow(occ_final),  nrow(occ_final)*0.5)
occ_train <- occ_final[selected,] # this is the selection to be used for model training
occ_test <- occ_final[-selected,] 
plot(occ_train,col="blue")
plot(occ_test,col="red",add=T):

## 随机分布点的参数选择:
https://jcoliver.github.io/learn-r/011-species-distribution-models.html#additional-resources
## 注意设计随机分布背景点时可以优化的代码:
background <- randomPoints(mask = mask,     # Provides resolution of sampling points
                   n = nrow(obs.data),      # Number of random points
                   ext = geographic.extent, # Spatially restricts sampling
                   extf = 1.25)
##方法 3:block.cut平均分组法;
library(ENMeval)
cut_block <- ENMeval::get.block(occ=as.data.frame(occ_final@coords), 
                       bg.coords=as.data.frame(bg@coords))
occ_final@data$cut_block <- cut_block$occ.grp
bg@data$cut_block <- cut_block$bg.grp
plot(occ_final)
plot(subset(occ_final,cut_block==1),col=1,add=T)
plot(subset(occ_final,cut_block==2),col=2,add=T)
plot(subset(occ_final,cut_block==3),col=3,add=T)
plot(subset(occ_final,cut_block==4),col=4,add=T)
##其他方法:来自于ENMeval包,详细需要查看原文;
#T Viscualize a data parition with the Checkerboard1 method
check1 <- get.checkerboard1(occurrences.ok, environments, bg, aggregation.factor=5)
#T Checkboar parition with differnt aggregation value
check1.large <- get.checkerboard1(occurrences.ok, environments, bg, aggregation.factor=30)
#T Checkerboard2
check2 <- get.checkerboard2(occurrences.ok, environments, bg, aggregation.factor=c(5,5))
#T k-1 Jackknife
jack <- get.jackknife(occurrences.ok, bg)
#T Random k-fold
random <- get.randomkfold(occurrences.ok, bg, k=5)# example generating 5 bins randomply
library(blockCV)
library(raster)
# Create spatial points data frame
sp_df <- SpatialPointsDataFrame(data@coords, data = as.data.frame(data@pa), proj4string = crs(predictors))
e_folds <- envBlock(rasterLayer = predictors, speciesData = sp_df, species = "data@pa", k = 4, standardization = "standard", rasterBlock = FALSE, numLimit = 100)
model <- train(method = "Maxnet", data = data, fc = "l", reg = 0.8, folds = e_folds)

results matching ""

    No results matching ""